Batch correction across tissues
I use data without patient 12 that might have gastric metaplasia samples.
sce.list <- readRDS("~/Dropbox/Postdoc/2019-12-29_BE2020/All_sce_filtered_high_quality.rds")
tissue.list <- list()
for(i in c("NE", "NG", "BE", "NSCJ", "BSCJ", "ND", "SMG")){
cur_sces <- sce.list[which(grepl(i, names(sce.list)))]
cur_sces <- lapply(cur_sces, function(n){
rowData(n) <- rowData(n)[,1:2]
n
})
# cur_sces <-lapply(cur_sces, function (x) {
# if(colnames(rowData(x))[3] == "Type") {
# colnames(rowData(x))[3]<-"is_feature_control"
# } else {
# colnames(rowData(x))[3]<-colnames(rowData(x))[3]
# }
# x
# })
# Order sce object by size
cur_sces <- cur_sces[order(unlist(lapply(cur_sces, ncol)), decreasing = TRUE)]
# Combine sce objects
tissue_sce <- do.call("cbind", cur_sces)
qclusters <- quickCluster(tissue_sce)
tissue_sce <- computeSumFactors(tissue_sce, clusters=qclusters)
tissue_sce <- logNormCounts(tissue_sce)
# Batch correction
corrected <- batch.correction(cur_sces)
# Save batch corrected counts in metdata
metadata(tissue_sce)$corrected <- corrected
# Compute tsne on corrected counts
set.seed(1234)
tsne <- Rtsne(t(corrected), pca = FALSE)
# Store tsne in slot
reducedDims(tissue_sce)$TSNE <- tsne$Y[,1:2]
# Clustering on corrected data
#dist.all <- as.dist(sqrt((1 - cor(as.matrix(corrected),
# method = "spearman"))/2))
#dendro <- hclust(dist.all, method = "ward.D2")
#cluster <- as.character(cutreeDynamic(dendro = dendro, distM = as.matrix(dist.all),
# minClusterSize = 10, deepSplit = 0))
if(i == "SMG") {
g <- buildSNNGraph(corrected, k = 4, d = NA) # Use of k = 4 allows for capture of ductal cells as a seperate cluster in SMG samples.
cluster <- igraph::cluster_louvain(g)$membership
} else {
g <- buildSNNGraph(corrected, k = 5, d = NA) # A good balance needed to separate the samples.
cluster <- igraph::cluster_louvain(g)$membership
}
# Save clustering in new slot
colData(tissue_sce)$Tissue_cluster <- cluster
# Visualize clustering
cur_p <- ggplot(data.frame(tSNE1 = reducedDims(tissue_sce)$TSNE[,1],
tSNE2 = reducedDims(tissue_sce)$TSNE[,2],
clusters = as.factor(colData(tissue_sce)$Tissue_cluster))) +
geom_point(aes(tSNE1, tSNE2, colour = clusters))+ggtitle(i)
print(cur_p)
cur_p <- ggplot(data.frame(tSNE1 = reducedDims(tissue_sce)$TSNE[,1],
tSNE2 = reducedDims(tissue_sce)$TSNE[,2],
clusters = as.factor(colData(tissue_sce)$Patient))) +
geom_point(aes(tSNE1, tSNE2, colour = clusters))+ggtitle(i)
print(cur_p)
# Visualize whether populations are patient specific
print(kable(table(cluster, colData(tissue_sce)$Patient),row.names = TRUE))
cat("\n")
# Perform differential expression
markers <- findMarkers(tissue_sce, groups = cluster, block = colData(tissue_sce)$Patient)
markers.spec <- lapply(markers, function(n){
if(!is.na(n$Top[1]) & !is.nan(sum(as.matrix(n[1,3:ncol(n)])))){
test_n <- !is.na(n[1,3:ncol(n)])[1,]
cur_n <- n[n$FDR < 0.1 & apply(n[,3:ncol(n)], 1, function(x){sum(x[test_n] > 0)}) == sum(test_n),]
if(nrow(cur_n) > 0){
cur_n$GeneName <- rowData(tissue_sce)$Symbol[match(rownames(cur_n), rowData(tissue_sce)$ID)]
}
}
else{
cur_n <- NULL
}
cur_n
})
if(!dir.exists(paste("~/Dropbox/Postdoc/2019-12-29_BE2020/Results/Marker_genes/Tissues_filtered/",
i, sep = ""))){
dir.create(paste("~/Dropbox/Postdoc/2019-12-29_BE2020/Results/Marker_genes/Tissues_filtered/",
i, sep = ""))
}
write.xlsx(markers.spec, paste("~/Dropbox/Postdoc/2019-12-29_BE2020/Results/Marker_genes/Tissues_filtered/", i, "/Marker_genes.xlsx", sep = ""))
tissue.list[[i]] <- tissue_sce
}


| 1 |
14 |
86 |
3 |
6 |
0 |
2 |
4 |
14 |
11 |
| 2 |
184 |
0 |
0 |
4 |
105 |
0 |
0 |
0 |
0 |
| 3 |
128 |
23 |
17 |
28 |
47 |
27 |
34 |
52 |
27 |
| 4 |
268 |
497 |
0 |
143 |
13 |
0 |
43 |
40 |
554 |
| 5 |
109 |
731 |
274 |
98 |
0 |
21 |
36 |
8 |
123 |
| 6 |
184 |
656 |
66 |
239 |
0 |
90 |
137 |
32 |
95 |
| 7 |
12 |
73 |
0 |
48 |
0 |
0 |
2 |
1 |
68 |
| 8 |
477 |
418 |
17 |
152 |
111 |
2 |
41 |
87 |
286 |
| 9 |
187 |
652 |
30 |
124 |
1 |
5 |
102 |
64 |
399 |
| 10 |
98 |
347 |
59 |
174 |
0 |
193 |
68 |
6 |
53 |
| 11 |
107 |
176 |
15 |
119 |
18 |
45 |
36 |
88 |
83 |
| 12 |
358 |
925 |
67 |
196 |
2 |
48 |
192 |
115 |
236 |
| 13 |
203 |
654 |
47 |
229 |
1 |
64 |
126 |
35 |
85 |
| 14 |
91 |
225 |
76 |
115 |
0 |
117 |
35 |
2 |
36 |
| 15 |
133 |
115 |
66 |
198 |
14 |
176 |
56 |
47 |
42 |
| 16 |
386 |
7 |
0 |
52 |
404 |
208 |
3 |
54 |
3 |
| 17 |
22 |
26 |
12 |
3 |
17 |
0 |
6 |
14 |
2 |

| 1 |
3 |
44 |
49 |
0 |
1 |
0 |
0 |
4 |
233 |
| 2 |
3 |
20 |
2 |
2 |
7 |
2 |
2 |
1 |
2 |
| 3 |
26 |
145 |
50 |
11 |
46 |
7 |
28 |
18 |
86 |
| 4 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
7 |
7 |
| 5 |
14 |
90 |
71 |
5 |
28 |
10 |
23 |
8 |
36 |
| 6 |
0 |
4 |
2 |
0 |
5 |
3 |
1 |
1 |
23 |
| 7 |
22 |
90 |
8 |
6 |
7 |
4 |
5 |
6 |
93 |
| 8 |
39 |
106 |
33 |
16 |
43 |
12 |
15 |
15 |
99 |
| 9 |
0 |
10 |
0 |
3 |
50 |
4 |
16 |
1 |
65 |
| 10 |
0 |
28 |
0 |
1 |
17 |
3 |
14 |
1 |
28 |
| 11 |
4 |
63 |
20 |
7 |
64 |
4 |
30 |
12 |
11 |
| 12 |
2 |
3 |
2 |
0 |
13 |
2 |
0 |
4 |
0 |
| 13 |
0 |
3 |
2 |
0 |
18 |
2 |
0 |
7 |
0 |
| 14 |
0 |
46 |
5 |
0 |
0 |
0 |
0 |
2 |
69 |
| 15 |
3 |
9 |
8 |
0 |
3 |
0 |
2 |
2 |
1 |
| 16 |
0 |
1 |
9 |
2 |
0 |
0 |
1 |
0 |
0 |
| 17 |
32 |
199 |
73 |
23 |
81 |
7 |
37 |
16 |
72 |
| 18 |
2 |
6 |
7 |
0 |
21 |
6 |
7 |
0 |
2 |
| 19 |
6 |
14 |
11 |
3 |
92 |
14 |
27 |
18 |
18 |
| 20 |
1 |
1 |
0 |
0 |
7 |
4 |
0 |
2 |
1 |
| 21 |
4 |
162 |
39 |
1 |
52 |
35 |
0 |
9 |
20 |
| 22 |
3 |
3 |
3 |
0 |
37 |
12 |
1 |
3 |
0 |
| 23 |
25 |
233 |
110 |
14 |
7 |
4 |
8 |
2 |
32 |
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 16 and 4
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 16 and 10
## Warning in .pairwise_blocked_template(x, clust.vals, nblocks, direction =
## direction, : no within-block comparison between 20 and 16



| 1 |
5 |
23 |
0 |
| 2 |
0 |
82 |
0 |
| 3 |
1 |
39 |
2 |
| 4 |
2 |
27 |
1 |
| 5 |
4 |
262 |
0 |
| 6 |
61 |
98 |
54 |
| 7 |
2 |
126 |
11 |
| 8 |
1 |
174 |
0 |
| 9 |
75 |
52 |
21 |
| 10 |
37 |
43 |
20 |
| 11 |
12 |
81 |
7 |
| 12 |
71 |
35 |
31 |
| 13 |
75 |
95 |
57 |
| 14 |
54 |
22 |
17 |
| 15 |
165 |
33 |
18 |
| 16 |
51 |
44 |
12 |
| 17 |
88 |
67 |
41 |
| 18 |
6 |
32 |
32 |
| 19 |
27 |
113 |
37 |


| 1 |
0 |
9 |
237 |
74 |
0 |
4 |
51 |
107 |
| 2 |
0 |
11 |
74 |
100 |
47 |
43 |
30 |
42 |
| 3 |
0 |
2 |
267 |
153 |
71 |
77 |
47 |
321 |
| 4 |
0 |
7 |
111 |
223 |
48 |
39 |
28 |
48 |
| 5 |
8 |
229 |
3 |
0 |
36 |
34 |
37 |
13 |
| 6 |
26 |
503 |
12 |
2 |
30 |
43 |
23 |
19 |
| 7 |
38 |
505 |
7 |
0 |
11 |
27 |
43 |
15 |
| 8 |
62 |
481 |
3 |
3 |
1 |
17 |
16 |
4 |
| 9 |
0 |
8 |
410 |
70 |
12 |
44 |
31 |
126 |
| 10 |
0 |
42 |
140 |
117 |
42 |
56 |
23 |
345 |
| 11 |
0 |
29 |
367 |
102 |
15 |
69 |
34 |
214 |
| 12 |
0 |
0 |
80 |
0 |
8 |
0 |
21 |
103 |
| 13 |
0 |
0 |
112 |
51 |
32 |
1 |
92 |
31 |
| 14 |
0 |
7 |
3 |
0 |
38 |
0 |
13 |
92 |
| 15 |
0 |
1 |
166 |
17 |
29 |
2 |
17 |
41 |
| 16 |
0 |
5 |
138 |
140 |
26 |
176 |
6 |
479 |
| 17 |
1 |
4 |
16 |
0 |
62 |
8 |
5 |
33 |
| 18 |
0 |
2 |
16 |
15 |
418 |
10 |
14 |
77 |
| 19 |
0 |
59 |
22 |
67 |
57 |
43 |
72 |
155 |
| 20 |
0 |
0 |
20 |
7 |
113 |
0 |
41 |
3 |
| 21 |
0 |
18 |
104 |
57 |
58 |
24 |
111 |
154 |
| 22 |
0 |
1 |
0 |
1 |
1 |
1 |
36 |
4 |
| 23 |
2 |
7 |
42 |
0 |
39 |
16 |
16 |
28 |
| 24 |
0 |
1 |
271 |
143 |
40 |
33 |
50 |
158 |
| 25 |
1 |
0 |
24 |
0 |
12 |
2 |
1 |
14 |
| 26 |
15 |
83 |
5 |
2 |
4 |
29 |
9 |
0 |


| 1 |
63 |
25 |
39 |
| 2 |
6 |
3 |
0 |
| 3 |
135 |
16 |
47 |
| 4 |
68 |
3 |
14 |
| 5 |
108 |
44 |
17 |
| 6 |
234 |
2 |
3 |
| 7 |
130 |
59 |
11 |
| 8 |
26 |
122 |
5 |
| 9 |
5 |
115 |
21 |
| 10 |
51 |
58 |
26 |
| 11 |
30 |
79 |
30 |
| 12 |
27 |
71 |
20 |
| 13 |
1 |
5 |
1 |
| 14 |
88 |
32 |
33 |
| 15 |
3 |
13 |
6 |
| 16 |
24 |
7 |
39 |
| 17 |
9 |
18 |
6 |
| 18 |
50 |
24 |
34 |
| 19 |
18 |
23 |
130 |
| 20 |
11 |
27 |
2 |
| 21 |
4 |
18 |
45 |


| 1 |
0 |
7 |
0 |
7 |
| 2 |
1 |
4 |
1 |
7 |
| 3 |
0 |
6 |
0 |
4 |
| 4 |
68 |
9 |
34 |
211 |
| 5 |
15 |
14 |
35 |
116 |
| 6 |
25 |
75 |
53 |
157 |
| 7 |
32 |
16 |
19 |
64 |
| 8 |
18 |
53 |
27 |
64 |
| 9 |
75 |
139 |
62 |
159 |
| 10 |
17 |
28 |
14 |
48 |
| 11 |
51 |
4 |
17 |
50 |
| 12 |
71 |
112 |
39 |
111 |
| 13 |
5 |
0 |
0 |
2 |
| 14 |
30 |
11 |
11 |
22 |
| 15 |
5 |
3 |
13 |
17 |
| 16 |
51 |
12 |
22 |
138 |
| 17 |
24 |
101 |
87 |
145 |
| 18 |
22 |
155 |
21 |
178 |
| 19 |
50 |
50 |
77 |
141 |
| 20 |
10 |
7 |
13 |
12 |


| 1 |
0 |
6 |
1 |
| 2 |
61 |
163 |
1 |
| 3 |
15 |
126 |
5 |
| 4 |
0 |
67 |
0 |
| 5 |
2 |
81 |
11 |
| 6 |
63 |
187 |
3 |
| 7 |
59 |
126 |
0 |
| 8 |
10 |
57 |
0 |
| 9 |
3 |
57 |
0 |
| 10 |
162 |
46 |
37 |
| 11 |
23 |
46 |
0 |
| 12 |
17 |
208 |
0 |
| 13 |
51 |
232 |
0 |
| 14 |
227 |
20 |
23 |
| 15 |
41 |
118 |
5 |
| 16 |
18 |
41 |
105 |
| 17 |
26 |
32 |
0 |
| 18 |
12 |
122 |
0 |
| 19 |
30 |
70 |
72 |
| 20 |
1 |
106 |
120 |
saveRDS(tissue.list, "~/Dropbox/Postdoc/2019-12-29_BE2020/Tissue_sce_filtered.rds")